% latex table generated in R 4.4.1 by xtable 1.8-4 package
% Fri Jan  2 15:22:11 2026
\begin{table}[ht]
\centering
\caption{Comparison of classifier performance} 
\label{tab:performance_test_heldout}
\begin{tabular}{lrrrrrr}
  \toprule
Metrics & Dictionary.x & LLM.x & Human.x & Dictionary.y & LLM.y & Human.y \\ 
  \midrule
Accuracy & 0.73 & 0.87 & 0.88 & 0.75 & 0.84 & 0.83 \\ 
  F1 (Average) & 0.73 & 0.87 & 0.87 & 0.75 & 0.84 & 0.82 \\ 
  F1 = 0 & 0.70 & 0.85 & 0.86 & 0.76 & 0.84 & 0.86 \\ 
  F1 = 1 & 0.75 & 0.88 & 0.89 & 0.74 & 0.84 & 0.78 \\ 
  Precision (Average) & 0.73 & 0.87 & 0.88 & 0.76 & 0.86 & 0.84 \\ 
  Precision = 0 & 0.71 & 0.87 & 0.88 & 0.84 & 0.97 & 0.80 \\ 
  Precision = 1 & 0.75 & 0.86 & 0.88 & 0.67 & 0.74 & 0.87 \\ 
  Recall (Average) & 0.73 & 0.87 & 0.87 & 0.76 & 0.86 & 0.81 \\ 
  Recall = 0 & 0.70 & 0.83 & 0.85 & 0.70 & 0.75 & 0.92 \\ 
  Recall = 1 & 0.76 & 0.90 & 0.90 & 0.82 & 0.97 & 0.70 \\ 
  Krippendorff's Alpha & 0.46 & 0.73 & 0.75 & 0.50 & 0.69 & 0.64 \\ 
   \bottomrule
\end{tabular}
\end{table}
